import scrapy
from Qiusbk.items import QiusbkItem


class QsbkSpider(scrapy.Spider):
    name = 'qsbk'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['https://www.qiushibaike.com/text/']

    '''终端存储'''
    # def parse(self, response):
    #     author_text = []
    #     # 解析作者名称和段子内容
    #     for div in response.xpath('//*[@id="content"]/div/div[2]/div'):
    #         author = div.xpath('.//h2/text()')[0].extract()
    #         text = div.xpath('./a[@class="contentHerf"]/div/span//text()').extract()
    #         text = ''.join(text)
    #         author_text.append({
    #             'author':author,
    #             'text': text
    #         })
    #     return author_text

    """管道存储"""
    def parse(self, response):
        author_text = []
        # 解析作者名称和段子内容
        for div in response.xpath('//*[@id="content"]/div/div[2]/div'):
            author = div.xpath('.//h2/text()')[0].extract()
            text = div.xpath('./a[@class="contentHerf"]/div/span//text()').extract()
            text = ''.join(text)
        
            # 创建item对象
            item = QiusbkItem()
            item['author'] = author
            item['text'] = text

            # 提交到pipelines
            yield item